/******************************************************************************
 * $Header: /boot/home/agmsmith/Programming/MatchUTF8/RCS/MatchUTF8.c,v 1.5 2002/07/28 22:02:06 agmsmith Exp $
 *
 * Wildcard Pattern Matching for the UTF-8 (a Unicode variation) character set.
 *
 * Based on match.c by J. Kercheval, dated from 1991.  Updated for UTF-8
 * character encoding by Alexander G. M. Smith, summer 2002.
 *
 * $Log: MatchUTF8.c,v $
 * Revision 1.5  2002/07/28 22:02:06  agmsmith
 * Added casts to unsigned char, as Brian Francis found that the PPC
 * compiler complains about converting from signed to unsigned.
 *
 * Revision 1.4  2002/07/27 22:30:58  agmsmith
 * Fix a few bugs, add more test cases.
 *
 * Revision 1.3  2002/07/27 20:17:54  agmsmith
 * Code reformatted and variables given better names.
 *
 * Revision 1.2  2002/07/25 20:57:14  agmsmith
 * Under reconstruction.
 *
 * A.G.M. Smith  Sat, 07/06/2002  11:18:33  Added UTF-8 character set support.
 * J. Kercheval  Tue, 03/12/1991  22:25:10  Released as V1.1 to Public Domain
 * J. Kercheval  Sun, 03/10/1991  20:37:11  beef up main()
 * J. Kercheval  Sun, 03/10/1991  20:11:11  add is_valid_pattern code
 * J. Kercheval  Sun, 03/10/1991  19:31:29  add error return to matche()
 * J. Kercheval  Fri, 02/22/1991  15:29:01  fix '\' bugs (two :( of them)
 * J. Kercheval  Wed, 02/20/1991  22:29:01  Released to Public Domain
 */

#include "MatchUTF8.h"

#define REPLACEMENT_UNICODE_CHAR (0xFFFD)


/******************************************************************************
 * This internal utility function reads the next character from a UTF-8
 * character encoded string and returns the 32 bit unicode character.  If it
 * can't decode the character, it returns 0xFFFD, the "replacement character".
 * See http://www.unicode.org/ for details.  If not NULL, the pointer specified
 * by EndPntrPntr is set to point at the start of the next UTF-8 character.
 *
 * Here is how Unicode scalar value characters are mapped to UTF-8:
 * 00000000 - 0000007F: 0xxxxxxx (meaning ASCII characters are themselves)
 * 00000080 - 000007FF: 110xxxxx 10xxxxxx
 * 00000800 - 0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
 * 00010000 - 001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 * 00200000 - 03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 * 04000000 - 7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
 */

static unsigned long GetNextUTF8Char (
  const char *StringPntr,
  const char **EndPntrPntr)
{
  const unsigned char *BytePntr;
  unsigned char Letter;
  int RemainingBytes;
  long UnicodeChar;

  BytePntr = (const unsigned char *) StringPntr;
  Letter = *BytePntr++;

  if (Letter == 0)
  {
    /* Don't advance beyond the end of a string. */
    BytePntr--;
    UnicodeChar = Letter;
  }
  else if (Letter <= 0x7F)
  {
    UnicodeChar = Letter;
  }
  else if (Letter <= 0xBF)
  {
    /* 0x80 to 0xBF is a middle byte of a multi-byte sequence,
    shouldn't be here at the start of a sequence, just skip it. */
    while (*BytePntr >= 0x80 && *BytePntr <= 0xBF)
      BytePntr++;
    UnicodeChar = REPLACEMENT_UNICODE_CHAR;
  }
  else if (Letter <= 0xFD)
  {
    /* Start of a multi-byte character.  Get the number of following bytes. */
    if (Letter < 0xE0)
      RemainingBytes = 1;
    else if (Letter < 0xF0)
      RemainingBytes = 2;
    else if (Letter < 0xF8)
      RemainingBytes = 3;
    else if (Letter < 0xFC)
      RemainingBytes = 4;
    else /* 0xFD */
      RemainingBytes = 5;

    /* Get the first few bits out of the starting byte. */
    UnicodeChar = (Letter & ((1 << (6 - RemainingBytes)) - 1));

    /* The remaining bytes should all be middle bytes, which are between 0x80
    and 0xBF. */
    while (RemainingBytes > 0)
    {
      Letter = *BytePntr;
      if (Letter < 0x80 || Letter > 0xBF)
      {
        UnicodeChar = REPLACEMENT_UNICODE_CHAR; /* Bad encoding found. */
        break;
      }
      UnicodeChar = ((UnicodeChar << 6) | (Letter & 0x3F));
      BytePntr++;
      RemainingBytes--;
    }
  }
  else
  {
    /* An invalid character, 0xFE or 0xFF, skip it. */
    UnicodeChar = REPLACEMENT_UNICODE_CHAR;
  }

  if (EndPntrPntr != 0)
    *EndPntrPntr = (const char *) BytePntr;
  return UnicodeChar;
}



/******************************************************************************
 * Return TRUE if PATTERN has any special wildcard characters.
 */

BOOLEAN IsUTF8Pattern (const char *Pattern)
{
  while (*Pattern)
  {
    switch (*Pattern++)
    {
      case '?':
      case '*':
      case '[':
      case '\\':
        return TRUE;
    }
  }
  return FALSE;
}



/******************************************************************************
 * Return TRUE if PATTERN has is a well formed regular expression according
 * to the above syntax.
 *
 * It also returns a more specific code in the integer pointed to by
 * ErrorCodePntr (NULL if you don't want the specific code).  The specific
 * error codes are:
 *
 *   PATTERN_VALID - pattern is well formed
 *   PATTERN_ESC   - pattern has invalid escape ('\' at end of pattern)
 *   PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
 *   PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
 *   PATTERN_EMPTY - [..] construct is empty (ie [])
 *   PATTERN_UTF8  - mangled UTF-8 encoding encountered in pattern, note that
 *                   mangled UTF-8 in the text will be considered to be equal
 *                   to the replacement character so that you can still match
 *                   it.
 */

BOOLEAN IsValidUTF8Pattern (const char *Pattern, int *ErrorCodePntr)
{
  int           ErrorCode = PATTERN_VALID;
  unsigned long Letter;

  /* Loop through pattern until end of string reached. */
  while (*Pattern != 0)
  {
    /* determine subpattern type */
    switch (*Pattern)
    {
      /* check literal escape, it cannot be at end of pattern */
      case '\\':
        if (*++Pattern == 0)
        {
          ErrorCode = PATTERN_ESC;
          goto ErrorExit;
        }
        if (GetNextUTF8Char (Pattern, &Pattern) == REPLACEMENT_UNICODE_CHAR)
        {
          ErrorCode = PATTERN_UTF8; /* Skipped UTF-8 character is bad. */
          goto ErrorExit;
        }
        break;

      /* the [..] construct must be well formed */
      case '[':
        /* Check for an empty [..] pattern including negated emptiness. */
        if (*++Pattern == ']')
        {
          ErrorCode = PATTERN_EMPTY;
          goto ErrorExit;
        }
        if ((*Pattern == '^' || *Pattern == '!') && (Pattern[1] == ']'))
        {
          ErrorCode = PATTERN_EMPTY;
          goto ErrorExit;
        }

        /* if end of pattern here then bad pattern */
        if (*Pattern == 0)
        {
          ErrorCode = PATTERN_CLOSE;
          goto ErrorExit;
        }

        /* loop to end of [..] construct */
        while (*Pattern != ']')
        {
          /* check for literal escape */
          if (*Pattern == '\\')
          {
            Pattern++; /* Skip the slash. */
            Letter = GetNextUTF8Char (Pattern, &Pattern); /* Skip a char. */

            /* if end of pattern here then bad pattern */
            if (Letter == 0)
            {
              ErrorCode = PATTERN_ESC;
              goto ErrorExit;
            }

            if (Letter == REPLACEMENT_UNICODE_CHAR)
            {
              ErrorCode = PATTERN_UTF8;
              goto ErrorExit;
            }
          }
          else /* Ordinary character inside the []. */
          {
            if (GetNextUTF8Char (Pattern, &Pattern) ==
            REPLACEMENT_UNICODE_CHAR)
            {
              ErrorCode = PATTERN_UTF8;
              goto ErrorExit;
            }
          }

          /* if end of pattern here then bad pattern, no closing ]. */
          if (*Pattern == 0)
          {
            ErrorCode = PATTERN_CLOSE;
            goto ErrorExit;
          }

          /* if this a range */
          if (*Pattern == '-')
          {
            /* we must have a normal end of range */
            if (*++Pattern == 0 || *Pattern == ']')
            {
              ErrorCode = PATTERN_RANGE;
              goto ErrorExit;
            }

            /* check for literal escape */
            if (*Pattern == '\\')
            {
              if (*++Pattern == 0) /* Skip the slash too. */
              {
                ErrorCode = PATTERN_ESC;
                goto ErrorExit;
              }
            }

            /* Get the character for the end of the range. */
            if (GetNextUTF8Char (Pattern, &Pattern) ==
            REPLACEMENT_UNICODE_CHAR)
            {
              ErrorCode = PATTERN_UTF8;
              goto ErrorExit;
            }
          }
        }
        break;

      /* all other characters are valid pattern elements */
      case '*':
      case '?':
      default:
        if (GetNextUTF8Char (Pattern, &Pattern) == REPLACEMENT_UNICODE_CHAR)
        {
          ErrorCode = PATTERN_UTF8;
          goto ErrorExit;
        }
        break;
    }
  }

ErrorExit:
  if (ErrorCodePntr != 0)
    *ErrorCodePntr = ErrorCode;
  return (ErrorCode == PATTERN_VALID);
}



/******************************************************************************
 * Internal function to handle the rest of the pattern if it starts with a
 * star.  Enters with Pattern pointing to the '*'.  Skip the minimal number of
 * wild card characters (represented by the star and question mark) and
 * recursively see if the rest of the text matches the rest of the pattern
 * (recursively using MatchUTF8e, with a quickie optimization if we know it
 * will fail).  If not, skip one more wild character and try again.  Repeat
 * until matched.  Returns the pattern match error or success code.
 */

static int MatchUTF8AfterStar (const char *Pattern, const char *Text)
{
  int           MatchCode;
  const char   *NextText;
  unsigned long PatternUni;  /* A character from pattern in unicode. */
  unsigned long TextUni;     /* A character from text in unicode. */

  /* Pass over leading ? and * wildcard characters in pattern. */

  while (*Pattern == '?' || *Pattern == '*')
  {
    /* Take one char out of the text for each '?'. */
    if (*Pattern == '?')
    {
      /* if end of text then no match */
      if (GetNextUTF8Char (Text, &Text) == 0)
        return MATCH_ABORT;
    }

    /* Skip the * or ? to move to the next char in pattern. */
    Pattern++;
  }

  /* if end of pattern we have matched regardless of text left */

  if (*Pattern == 0)
    return MATCH_VALID;

  /* Get the next character to match which must be a literal or '[' since we've
  skipped over the wildcard characters. */

  PatternUni = GetNextUTF8Char (Pattern, 0 /* Not advancing pattern. */);
  if (PatternUni == REPLACEMENT_UNICODE_CHAR)
    return MATCH_PATTERN;

  if (PatternUni == '\\')
  {
    PatternUni = GetNextUTF8Char (Pattern + 1, 0 /* Not advancing pattern. */);
    if (PatternUni == REPLACEMENT_UNICODE_CHAR || PatternUni == 0)
      return MATCH_PATTERN;
  }

  /* Continue increasing the number of skipped characters represented by the
  star until we run out of text or the rest of the text past the skipped part
  matches the rest of the pattern.  On bad match failures (MATCH_PATTERN) or
  there isn't enough text to match (MATCH_ABORT) stop trying immediately.  Also
  stop when successful. */

  MatchCode = 0; /* An unused code, never returned by MatchUTF8e. */
  do
  {
    /* A precondition for matching being successful is that the next character
    in the pattern match the next character in the text or that the next
    pattern char is the beginning of a range.  If it isn't, don't bother
    calling MatchUTF8e, we know it won't work. */

    TextUni = GetNextUTF8Char (Text, &NextText);

    if (TextUni == 0)
      MatchCode = MATCH_ABORT;
    else if (PatternUni == TextUni ||
    *Pattern /* can't use PatternUni, need unescaped */ == '[')
      MatchCode = MatchUTF8e (Pattern, Text);
    else /* Definitely won't match. */
      ; /* Do nothing. */

    Text = NextText; /* Advance one wild character in the text and retry. */
  } while (MatchCode != MATCH_VALID &&
          MatchCode != MATCH_ABORT &&
          MatchCode != MATCH_PATTERN);

  return MatchCode;
}



/******************************************************************************
 * Match the pattern PATTERN against the string TEXT;
 *
 * MatchUTF8e() returns MATCH_VALID if pattern matches, or an errorcode as
 * follows otherwise:
 *
 * MATCH_PATTERN  - bad pattern or misformed pattern.
 * MATCH_LITERAL  - match failed on character match (standard character).
 * MATCH_RANGE    - match failure on character range ([..] construct).
 * MATCH_ABORT    - premature end of text string (pattern longer than
 *                  text string).
 * MATCH_END      - premature end of pattern string (text longer than
 *                  what the pattern called for).
 * MATCH_VALID    - valid match using pattern.
 *
 * A match means the entire string TEXT is used up in matching.
 *
 * In the pattern string:
 *   * matches any sequence of characters (zero or more).
 *   ? matches any character.
 *   [SET] matches any character in the specified set,
 *   [!SET] or [^SET] matches any character not in the specified set.
 *
 *  A set is composed of characters or ranges; a range looks like 'character
 *  hyphen character' (as in 0-9 or A-Z).  [0-9A-Z_] is the set of all upper
 *  case English letters, digits and the underscore expressed as a concise []
 *  pattern construct (equivalent to [0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_]).
 *  Multibyte characters are allowed (ie. UTF-8 unicode characters) and
 *  treated as a single character for matching purposes and range purposes.
 *
 *  To suppress the special syntactic significance of any of `[]*?!^-\',
 *  and match the character exactly, precede it with a `\'.
 */

int MatchUTF8e (const char *Pattern, const char *Text)
{
  BOOLEAN       Invert;      /* is this [..] or [!..] */
  BOOLEAN       MemberMatch; /* have I matched the [..] construct? */
  unsigned long PatternUni;  /* A character from pattern in unicode. */
  unsigned long RangeEnd;    /* Range start and end as decoded */
  unsigned long RangeStart;  /* 32 bit unicode character values. */
  unsigned long TextUni;     /* A character from text in unicode. */

  while (*Pattern)
  {
    /* if this is the end of the text then this is the end of the match */

    if (*Text == 0)
    {
      return (Pattern[0] == '*' && Pattern[1] == '\0') ?
        MATCH_VALID : MATCH_ABORT;
    }

    /* determine and react to pattern type */

    switch (*Pattern)
    {
      case '?': /* single any character match */
        Pattern++;
        GetNextUTF8Char (Text, &Text);
        break;

      case '*': /* multiple any character match */
        return MatchUTF8AfterStar (Pattern, Text);

      case '[':  /* [..] construct, single member/exclusion character match */
        Pattern++; /* move to beginning of set specification */

        /* check if this is a member match or exclusion match */
        Invert = FALSE;
        if (*Pattern == '!' || *Pattern == '^')
        {
          Invert = TRUE;
          Pattern++;
        }

        /* If closing bracket here then we have a malformed (empty) pattern. */
        if (*Pattern == ']')
          return MATCH_PATTERN;

        /* Get the text character we will be matching against. */
        TextUni = GetNextUTF8Char (Text, &Text);

        MemberMatch = FALSE;
        while (TRUE)
        {
          /* if end of [...] construct then loop is done */
          if (*Pattern == ']')
          {
            Pattern++;
            break;
          }

          /* Allow for escaped characters, now that we're past the ']' test. */
          if (*Pattern == '\\')
                Pattern++;

          /* if end of pattern then bad pattern (Missing ']') */
          RangeStart = RangeEnd = GetNextUTF8Char (Pattern, &Pattern);
          if (RangeStart == 0 || RangeStart == REPLACEMENT_UNICODE_CHAR)
            return MATCH_PATTERN;

          /* check for range bar */

          if (*Pattern == '-')
          {
            /* get the range end */
            Pattern++;
            RangeEnd = GetNextUTF8Char (Pattern, &Pattern);
            if (RangeEnd == REPLACEMENT_UNICODE_CHAR)
              return MATCH_PATTERN;

            /* Can't have "[A-]" as a pattern, user should escape the ']'. */
            if (RangeEnd == ']')
              return MATCH_PATTERN;

            /* Handle the special escaped character range end case. */
            if (RangeEnd == '\\')
            {
              RangeEnd = GetNextUTF8Char (Pattern, &Pattern);
              if (RangeEnd == REPLACEMENT_UNICODE_CHAR)
                return MATCH_PATTERN;
            }

            /* Bad pattern if range end isn't there. */
            if (RangeEnd == 0)
              return MATCH_PATTERN;
          }

          /* If the text character is in range then match found.  Make sure the
          range letters have the proper relationship to one another before
          comparison (allow them to be reveresed). */

          if (RangeStart < RangeEnd)
          {
            if (TextUni >= RangeStart && TextUni <= RangeEnd)
            {
              MemberMatch = TRUE;
              break;
            }
          }
          else /* Backwards range specification. */
          {
            if (TextUni >= RangeEnd && TextUni <= RangeStart)
            {
              MemberMatch = TRUE;
              break;
            }
          }
        } /* End while looking for a match. */

        /* If there was a match in an exclusion set (the inverted case) then no
        match, if there was no match in a member set (the normal situation)
        then no match. */

        if ((Invert && MemberMatch) || (!Invert && !MemberMatch))
          return MATCH_RANGE;

        /* If the character was in the set, the Pattern pointer isn't yet past
        the end of the [...], so skip the rest of the [...] construct if we
        already matched.  Check for pattern errors along the way. */

        if (MemberMatch)
        {
          while (*Pattern != ']')
          {
            /* bad pattern (Missing ']') */
            if (*Pattern == 0)
              return MATCH_PATTERN;

            /* Skip a character in the pattern, including whole escaped one. */
            if (*Pattern == '\\')
              Pattern++;
            if (GetNextUTF8Char (Pattern, &Pattern) ==
            REPLACEMENT_UNICODE_CHAR)
              return MATCH_PATTERN;
          }
          Pattern++; /* Skip past the closing ']'. */
        }
        break;

      case '\\': /* next character is quoted and must match exactly */
        /* move pattern pointer to quoted char and fall through */
        Pattern++;
        /* if end of text then we have a bad pattern */
        if (*Pattern == 0)
          return MATCH_PATTERN;

      default: /* must match this character exactly */
        PatternUni = GetNextUTF8Char (Pattern, &Pattern);
        if (PatternUni == REPLACEMENT_UNICODE_CHAR)
          return MATCH_PATTERN;
        if (PatternUni != GetNextUTF8Char (Text, &Text))
          return MATCH_LITERAL;
    }
  }

  /* if end of text not reached then the pattern fails */

  if (*Text != 0)
    return MATCH_END;

  return MATCH_VALID;
}



/******************************************************************************
 * MatchUTF8() returns TRUE if pattern matches, FALSE otherwise.
 */

BOOLEAN MatchUTF8 (const char *Pattern, const char *Text)
{
  return (MatchUTF8e (Pattern, Text) == MATCH_VALID);
}



#ifdef TEST
/******************************************************************************
 * This test main expects as first arg the pattern and as second arg the match
 * string.  Output is yea or nay on match.  If nay on match then the error code
 * is parsed and written.
 */

#include <stdio.h>

int test (const char *Pattern, const char *Text)
{
  int error = 0;
  int is_valid_error;

  printf("Pattern: \"%s\"\n", Pattern);
  printf("Text   : \"%s\"\n", Text);

  if (!IsUTF8Pattern (Pattern))
    printf("    First Argument Is Not A Pattern\n");
  else
  {
    error = MatchUTF8e (Pattern, Text);
    IsValidUTF8Pattern (Pattern, &is_valid_error);

    switch (error)
    {
    case MATCH_VALID:
      printf("    Match Successful");
      if (is_valid_error != PATTERN_VALID)
        printf(" -- but is_valid_pattern() is complaining!\n");
      else
        printf("\n");
      break;

    case MATCH_LITERAL:
      printf("    Match Failed on Literal\n");
      break;

    case MATCH_RANGE:
      printf("    Match Failed on [..]\n");
      break;

    case MATCH_ABORT:
      printf("    Match Failed on Early Text Termination\n");
      break;

    case MATCH_END:
      printf("    Match Failed on Early Pattern Termination\n");
      break;

    case MATCH_PATTERN:
      switch (is_valid_error)
      {
      case PATTERN_VALID:
        printf("    Internal Disagreement On Pattern\n");
        break;

      case PATTERN_ESC:
        printf("    Literal Escape at End of Pattern\n");
        break;

      case PATTERN_RANGE:
        printf("    No End of Range in [..] Construct\n");
        break;

      case PATTERN_CLOSE:
        printf("    [..] Construct is Open\n");
        break;

      case PATTERN_EMPTY:
        printf("    [..] Construct is Empty\n");
        break;

      case PATTERN_UTF8:
        printf("    Mangled UTF-8 characters in Pattern\n");
        break;

      default:
        printf("    Internal Error in is_valid_pattern()\n");
      }
      break;

    default:
      printf("    Internal Error in matche()\n");
      break;
    }
  }

  return error;
}


int main (int argc, char *argv[])
{
  /* Some standard test cases. */
  if (test ("This", "That") != 0) goto ErrorExit;
  if (test ("Th?s", "That") != MATCH_LITERAL) goto ErrorExit;
  if (test ("Th??", "That") != MATCH_VALID) goto ErrorExit;
  if (test ("Th???", "That") != MATCH_ABORT) goto ErrorExit;
  if (test ("Th?", "That") != MATCH_END) goto ErrorExit;
  if (test ("\xC2 Too Short Unicode?", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("\xC2\xAE\xAF Too Long Unicode?", "\xC2\xAE Thing") != MATCH_PATTERN) goto ErrorExit;
  if (test ("\xFF Illegal $FF byte in Unicode?", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("*:?? Done", "Test replacement char:\xC2\xAE\xAE\xAF\xAE\xAD\xAC Done") != MATCH_VALID) goto ErrorExit;
  if (test ("x?z", "x\xC3\x87z") != MATCH_VALID) goto ErrorExit;
  if (test ("x[\xE2\x80\xA0-\xE2\x80\xA2]z", "x\xE2\x80\xA1z") != MATCH_VALID) goto ErrorExit;
  if (test ("x[\xE2\x80\xA0-\\\xE2\x80\xA2]z", "x\xE2\x80\xA1z") != MATCH_VALID) goto ErrorExit;
  if (test ("x[\\]\xE2\x80\xA0-\\\xE2\x80\xA2]z", "x]z") != MATCH_VALID) goto ErrorExit;
  if (test ("Th\\", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[a-", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[a-]", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[a-\\", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[a-\\]", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[]", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[!]", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[^]", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[a", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th[", "That") != MATCH_PATTERN) goto ErrorExit;
  if (test ("Th*?*?*?", "That") != MATCH_ABORT) goto ErrorExit;
  if (test ("Th*?t", "That") != MATCH_VALID) goto ErrorExit;
  if (test ("Th*?**t*r*e", "Theatre") != MATCH_VALID) goto ErrorExit;
  if (test ("T*\\[*]", "The [zot]") != MATCH_VALID) goto ErrorExit;
  if (test ("T*\\", "The") != MATCH_PATTERN) goto ErrorExit;
  if (test ("X*", "X") != MATCH_VALID) goto ErrorExit;
  if (test ("X*y", "X") != MATCH_ABORT) goto ErrorExit;
  if (test ("X[!A-Z]y", "X2y") != MATCH_VALID) goto ErrorExit;
  if (test ("X[^Z-A]y", "X\xE2\x80\xA2y") != MATCH_VALID) goto ErrorExit;
  if (test ("X[\\]]y", "X]y") != MATCH_VALID) goto ErrorExit;
  if (test ("X[\\--]]y", "X?y") != MATCH_PATTERN) goto ErrorExit;
  if (test ("X[\\--\\]]y", "X?y") != MATCH_VALID) goto ErrorExit;
  if (test ("[!XYZ]", "X") != MATCH_RANGE) goto ErrorExit;
  if (test ("[abc]z", "bz") != MATCH_VALID) goto ErrorExit;
  if (test ("[abcd", "b") != MATCH_PATTERN) goto ErrorExit;
  if (test ("[abc]z", "dz") != MATCH_RANGE) goto ErrorExit;
  if (test ("\\[", "[") != MATCH_VALID) goto ErrorExit;

  if (argc != 3)
    printf("Usage:  MATCH Pattern Text\n");
  else
    test (argv[1], argv[2]);

  return 0;

ErrorExit:
  printf ("*** ERROR *** detected, fix the code!\n");
  return -1;
}
#endif /* TEST */
